*** Jetson Leder-Luis
*** January, 2023
*** Can Whistleblowers Root Out Public Expenditure Fraud? Evidence from Medicare

*** Prepared for REStat 

*** This file prepares data for analysis of health effects in Appendix G


global basepath "/homes/nber/jetson-dua52260/poterba-DUA52260/jetson-dua52260/kyphohealth"
global codepath "$basepath/code"
global logpath "$codepath/logs"
global resultspath "$basepath/results"
global datapath "$basepath/data"
global rawpath "/disk/aging/medicare/data/harm/100pct"

log using "$logpath/KyphoHealthClean.log", append  


*** Grab data from all ever-osteo patients in bsf-cc
foreach YEAR of numlist 2002/2015{
	disp `YEAR'
	use bene_id osteoprs using "$rawpath/bsfcc/`YEAR'/bsfcc`YEAR'.dta" if inlist(osteoprs, 1, 3), clear 
	duplicates drop bene_id, force 
	count 
	drop osteoprs
	gen year = `YEAR'
	save  "$datapath/bsf_osteo_ID_`YEAR'.dta", replace 
}

** Make the full panel; so if a patient is Osteoflag -> No osteoflag -> Osteoflag
** they are our data for all 3 years
clear
foreach YEAR of numlist 2002/2015{
	append using "$datapath/bsf_osteo_ID_`YEAR'.dta"
}
count
egen long ben = group(bene_id)
tsset ben year 
tsfill 
sort ben year 
carryforward bene_id, replace 
drop ben 

save "$datapath/osteo_idpanel.dta", replace 

*** Clean bsf-cc (no bene_id dupes)
foreach YEAR of numlist 2002/2015	{
	disp `YEAR'
	use "$rawpath/bsfcc/`YEAR'/bsfcc`YEAR'.dta" if inlist(osteoprs, 1, 3)
	duplicates drop bene_id, force 
	save  "$datapath/osteo_bsfcc_`YEAR'.dta", replace 
	** Note that this means we only have ccs for years where the patient has osteoporosis
		** This is okay because we only use ccs in the patient's base (first osteoporosis) year
}

*** Clean bsf-ab (no bene_id dupes)
foreach YEAR of numlist 2002/2015	{
	disp `YEAR'
	use "$rawpath/bsf/`YEAR'/bsfab`YEAR'.dta"
	gen year = `YEAR'
	merge m:1 bene_id year using "$datapath/osteo_idpanel.dta", keep(match) nogen
	duplicates drop bene_id, force 
	save  "$datapath/osteo_bsfab_`YEAR'.dta", replace 
}


*** Merge in bsf and bsf-cc
foreach YEAR of numlist 2002/2015	{
	disp `YEAR'
	use "$datapath/osteo_idpanel.dta", clear 
	keep if year == `YEAR'
	merge 1:1 bene_id using "$datapath/osteo_bsfcc_`YEAR'.dta", keep (master match) nogen
	merge 1:1 bene_id using "$datapath/osteo_bsfab_`YEAR'.dta", keep(master match) nogen
	save  "$datapath/bsf_osteo_`YEAR'.dta", replace 
}

** Clean relevant IP claims
foreach YEAR of numlist 2002/2015{
	use bene_id drg_cd admsndt dschrgdt using "$rawpath/med/`YEAR'/med`YEAR'.dta", clear

	gen year = `YEAR'
	** Keep only osteoprs patient claims
	merge m:1 bene_id year using "$datapath/osteo_idpanel.dta", keep(match) keepusing(bene_id year) nogen

	** Make staylength var
	gen staylength = dschrgdt - admsndt

	** Drop 2016 fiscal year data due to DRG change 
	drop if dschrgdt >= mdy(10, 1, 2015)

	***Crosswalk DRGs so we can do cross-sectional analysis 
	gen after_sample = 1 if dschrgdt >= mdy(10,1,2007)
	replace after_sample = 0 if  dschrgdt < mdy(10,1,2007)

	rename drg_cd code 
	gen msdrg = code if after_sample == 1
	gen drg_cd = code if after_sample == 0

	*** Merge into groups
	merge m:1 drg_cd using "$datapath/drgcrosswalkgrouped_pre.dta", keep(master match) nogen
	cap rename group group_pre

	merge m:1 msdrg using "$datapath/drgcrosswalkgrouped_post.dta", keep(master match) nogen
	cap rename group group_post

	gen group = min(group_pre, group_post)

	** Only keep relevant stays 
	gen kyphostay = 1 if inlist(group, 154, 170) & staylength <= 7 
	gen anystay = 1 
	

	collapse( sum) kyphostay (sum) anystay, by(bene_id)

	gen year = `YEAR'

	save  "$datapath/ip_osteo_`YEAR'.dta", replace 
}

** Clean relevant OP claims
foreach YEAR of numlist 2002/2015{
	use bene_id hcpcs_cd using "$rawpath/op/`YEAR'/opr`YEAR'.dta", clear

	gen year = `YEAR'

	merge m:1 bene_id year using "$datapath/osteo_idpanel.dta", keep(match) keepusing(bene_id year) nogen

	destring hcpcs_cd, replace force 

	gen kypho_vertebro = 1 if hcpcs_cd >= 22510 & hcpcs_cd <= 22515
	gen surgicalspine =1 if hcpcs >= 22010 & hcpcs <= 22899
	gen otherspine =1 if hcpcs_cd == 22899

	gen anyOPcount = 1 	

	collapse (sum) kypho_vertebro (sum) surgicalspine (sum) otherspine (sum) anyOPcount, by(bene_id)

	gen year = `YEAR'

	save  "$datapath/op_osteo_`YEAR'.dta", replace 

}

** Perform a large merge 
foreach YEAR of numlist 2002/2015{
	use "$datapath/osteo_idpanel.dta", clear 
	keep if year == `YEAR'
	merge 1:1 bene_id using "$datapath/bsf_osteo_`YEAR'.dta", keep(master match) nogen
	merge 1:1 bene_id using "$datapath/ip_osteo_`YEAR'.dta", keep(master match) nogen
	merge 1:1 bene_id using "$datapath/op_osteo_`YEAR'.dta", keep(master match) nogen

	save "$datapath/osteo_`YEAR'_ip_op_bsf.dta", replace 
}
*/
clear
** Append to a single file
foreach YEAR of numlist 2002/2015{
	append using "$datapath/osteo_`YEAR'_ip_op_bsf.dta"
}
save "$datapath/osteopanel.dta", replace 



** Build death dates in for all 
* In each year, replace death date with newest year's info if not missing
use bene_id using "$datapath/osteo_idpanel.dta", clear 
duplicates drop
gen deathdateclean= . 
foreach YEAR of numlist 2002/2015{
	merge 1:1 bene_id using "$rawpath/bsf/`YEAR'/bsfab`YEAR'.dta", keep(master match) keepusing(death_dt) nogen
	replace deathdateclean = min(deathdateclean, death_dt)
	format deathdateclean %td
	drop death_dt 
}
save "$datapath/deathdates.dta"


** Merge death dates for all 
use "$datapath/osteopanel.dta", clear 
merge m:1 bene_id using "$datapath/deathdates.dta"
save "$datapath/osteopanel_death.dta", replace 






